********************************************************************************
** 	TITLE: b0_zipcode_variables.do
**
**	PROJECT: IGNITE
** 
**  PURPOSE: Cleans census tract and zipcode variables to merge with Genesee County
**	JMS data.
**	
**	Note: Variables used produce Auxiliary Controls in B5
********************************************************************************
		set sortseed 13

/********************************************************************************
* Clean Census Tract ACS
********************************************************************************/

****************************
* Prepare ACS 5 Year
****************************

*Demographics
foreach j in 2016 {
	import delimited "$input_data/census_tract/acs_5y_census_tract/ACSDP5Y2016.DP05_2023-04-12T112158/ACSDP5Y2016.DP05-Data.csv", clear varname(1)
	drop if _n==1
	gen tract = substr(geo_id,-6,.)
	gen countyfips = substr(geo_id,-9,3)
	gen id = countyfips+tract
	keep tract countyfips id dp05_0001e dp05_0003pe dp05_0021pe dp05_0060pe dp05_0066pe dp05_0018e
//Convert into float storage type	
	ren (dp05_0001e dp05_0003pe dp05_0021pe dp05_0060pe dp05_0066pe dp05_0018e) (total_pop pc_female pc_65plus pc_black pc_hispanic total_18plus)
	destring total_pop total_18plus pc*, force replace float 
	tempfile demo_`j'
	save `demo_`j''
}

clear
foreach j in 2016{
	append using `demo_`j''
}

collapse (mean) total_pop total_18plus pc*, by(id)
tempfile demo
save `demo'


*Median Household Income
foreach j in 2016{
	import delimited "$input_data/census_tract/acs_5y_census_tract/ACSST5Y2016.S1903_2023-04-12T112114/ACSST5Y`j'.S1903-Data.csv", clear varname(1)
	drop if _n==1
	gen tract = substr(geo_id,-6,.)
	gen countyfips = substr(geo_id,-9,3)
	gen id = countyfips+tract
	keep tract countyfips id s1903_c02_001e
	ren (s1903_c02_001e) (med_hhincome)
	destring med_hhincome, force replace
	tempfile income_`j'
	save `income_`j''
}

clear
foreach j in 2016{
	append using `income_`j''
}
collapse (mean) med_hhincome, by(id)
tempfile income
save `income'


*Education
foreach j in 2016{
	import delimited "$input_data/census_tract/acs_5y_census_tract/ACSST5Y2016.S1501_2023-04-12T104028/ACSST5Y`j'.S1501-Data.csv", clear varname(1)
	drop if _n==1
	gen tract = substr(geo_id,-6,.)
	gen countyfips = substr(geo_id,-9,3)
	gen id = countyfips+tract
	keep tract countyfips id s1501_c02_014e s1501_c02_015e
//Convert into float storage type
	ren (s1501_c02_014e s1501_c02_015e) ///
		(pc_hs pc_clg)
	destring pc_hs pc_clg, force replace float 
	tempfile edu_`j'
	save `edu_`j''
}

clear
foreach j in 2016{
	append using `edu_`j''
}
collapse (mean) pc_hs pc_clg, by(id)

*merge
merge 1:1 id using `income', nogen
merge 1:1 id using `demo', nogen

****************************
* Prepare Blood Lead Data
****************************

preserve
import delimited "$input_data/census_tract/lead_blood_level_by_tract/LeadBloodLevels_2017_byTract_20181129.csv", clear varname(1) stringcol(1 2 3)
gen tract = substr(geoid10,-6,.)
gen id = substr(geoid10,-9,.)
collapse (sum) cnttested ebll under6cnttested under6ebll under18cnttested under18ebll, by(id)
tempfile lead
save `lead'
restore

merge 1:1 id using `lead', nogen
save "$output_data/census_vars.dta", replace